set more off
capture log close

//  program:    AnanatEtAl_test_score_trends
//  task:		Run Regressions for test score results inclduding state-year trends

//change working directory to point to the file with the data
cd *** Fill in path name here ***

//create log file
log using AnanatEtAl_test_score_trends, replace text

//load the merged bls naep data
use AnanatEtAl_test_score_data, clear

//keep a balanced panel of states that have both NAEP and BLS data for all years (CA, CT, GA, IN, LA, MA, MI, MN, MO, NY, NC, SC, TN, TX, VA)
keep if stateno==5 | stateno==7 | stateno==11 | stateno==15 | stateno==19 | stateno==22 | stateno==23 | stateno==24 | stateno==26 | stateno==33 | stateno==34 | stateno==41 | stateno==43 | stateno==44 | stateno==47

//create the trend variables
for var stated*: qui gen trendX=year*X

//set the panel variables
xtset stateno year

//run regressions using test-taker sample sizes as frequency weights

** All Students **
ivregress 2sls z_all yrd* stated* trend* (seppctyrlag = ticpctyrlag) [fweight=sample_size], vce(cluster stateno) first
est store ALL
outreg2 [ALL] using test_score_trends, drop(yrd* stated* trend*) excel replace dec(3)

** Black Students **
ivregress 2sls z_black yrd* stated* trend* (seppctyrlag = ticpctyrlag) [fweight=n_black], vce(cluster stateno) first
est store BLACK
outreg2 [BLACK] using test_score_trends, drop(yrd* stated* trend*) excel dec(3) 

** White Students **
ivregress 2sls z_white yrd* stated* trend* (seppctyrlag = ticpctyrlag) [fweight=n_white], vce(cluster stateno) first
est store WHITE
outreg2 [WHITE] using test_score_trends, drop(yrd* stated* trend*) excel dec(3)

** State-years with below median unemployment **
//generate the median unemployment cutoff using all states
sort year
by year: egen cutoff = pctile(unemplag), p(50)
keep if unemplag<=cutoff

//run regression weighted by test-taker sample size
ivregress 2sls z_all yrd* stated* trend* (seppctyrlag = ticpctyrlag) [fweight=sample_size], vce(cluster stateno) first
est store LOWUNEMP
outreg2 [LOWUNEMP] using test_score_trends, drop(yrd* stated* trend*) excel dec(3)

clear all

** State-years with above median unemployment **
//load the data
use AnanatEtAl_test_score_data, clear

//keep a balanced panel of states that have both NAEP and BLS data for all years (CA, CT, GA, IN, LA, MA, MI, MN, MO, NY, NC, SC, TN, TX, VA)
keep if stateno==5 | stateno==7 | stateno==11 | stateno==15 | stateno==19 | stateno==22 | stateno==23 | stateno==24 | stateno==26 | stateno==33 | stateno==34 | stateno==41 | stateno==43 | stateno==44 | stateno==47

//generate the median unemployment cutoff value using all states
sort year
by year: egen cutoff = pctile(unemplag), p(50)
keep if unemplag>cutoff

//create the trend variables
for var stated*: qui gen trendX=year*X

//set the panel variables
xtset stateno year

//run regression weighted by test-taker sample size
ivregress 2sls z_all yrd* stated* trend* (seppctyrlag = ticpctyrlag) [fweight=sample_size], vce(cluster stateno) first
est store HIGHUNEMP
outreg2 [HIGHUNEMP] using test_score_trends, drop(yrd* stated* trend*) excel dec(3)

clear all

log close

